# define functions:

find_cos_sim <- function(target_embedding, pre_trained, candidates,
                         norm = "l2")
{
  if (length(candidates)==1)
    cos_sim <- text2vec::sim2(x = matrix(pre_trained[candidates,
    ], nrow=1), y = matrix(target_embedding, nrow = 1), method = "cosine",
    norm = norm)
  if (length(candidates)>1)
    cos_sim <- text2vec::sim2(x = pre_trained[candidates,
    ], y = matrix(target_embedding, nrow = 1), method = "cosine",
    norm = norm)
  return(cos_sim)
}

get_ts_cos_sim <- function(x, timevar, textvar, target, candidates,
                           pre_trained, transform_matrix,
                           window = 6, valuetype = "fixed",
                           case_insensitive = TRUE, hard_cut = FALSE,
                           verbose = TRUE) {
  
  
  timevec <- unique(x[[timevar]])
  timevec <- sort(timevec)
  
  veclist <- list()
  
  for (i in seq_along(timevec)) {
    
    timevecunit = timevec[[i]]
    
    # get context words for target
    contextftu <- get_context(x = subset(x[[textvar]], x[[timevar]]==timevecunit),
                              target = target,
                              window = window, valuetype = valuetype,
                              case_insensitive = case_insensitive,
                              hard_cut = hard_cut, verbose = verbose)
    
    # embed each instance using a la carte
    # try catch errors for when zero instances: replaced with NULL
    error <- tryCatch(veclist[[i]] <- embed_target(context = contextftu$context, pre_trained,
                                                   transform_matrix, transform = TRUE,
                                                   aggregate = TRUE, verbose = verbose),
                      error=function(e) e)
    if (inherits(error, 'error')) {
      veclist[[i]] <- list(target_embedding = NULL,
                           local_vocab = NULL,
                           obs_included = NULL)
      next
    }
  }
  
  cos_simsdf <- data.frame()
  cos_sims <- vector()
  
  for (i in seq_along(veclist)) {
    target_embedding = veclist[[i]][["target_embedding"]]
    
    # get cosine similarities
    # trycatch for errors when zero instances; replaced with NA
    error <- tryCatch(cos_sim <- find_cos_sim(target_embedding = target_embedding,
                                              pre_trained,
                                              candidates = candidates, norm = "l2"),
                      error=function(e) e)
    
    if (inherits(error, 'error')) {
      cos_sim <- as.vector(rep(NA, length(candidates)))
    }
    
    cos_sim <- as.vector(cos_sim)
    cos_sims <- rbind(cos_sims, cos_sim)
    cos_simsdf <- as.data.frame(cos_sims, row.names = F)
    
  }
  
  for (i in seq_along(candidates)) {
    cname = candidates[[i]]
    names(cos_simsdf)[i] <- paste0(cname)
  }
  
  cos_simsdf <- cbind(cos_simsdf, timevec)
  
  return(cos_simsdf)
}

#' Generate and select additional semantically related words from topical seed word(s)
#'
#' @param seed_words (character) vector - seed word(s)
#' @param pre_trained (numeric) - a F x D matrix corresponding to pretrained embeddings.
#' F = number of features and D = embedding dimensions.
#' @param graphics (logical) - opens window for selection of words
#' @param N (numeric) - number of words to generate for selection
#' @param norm character = c("l2", "none") - how to scale input matrices. If they are already scaled - use "none" (see ?sim2)
#'
#' @return a character vector comprising selected words
#' @export
#'
#' @examples
#' nns_selected <- label_propagate(seed_words =  c("president", "executive"),
#'                                 pre_trained = cr_glove_subset, N = 10,
#'                                 graphics = T)

label_propagate <- function(seed_words,
                            pre_trained,
                            graphics = FALSE,
                            N = 10,
                            norm = "l2") {
  comb_vec = numeric()
  for (i in seq_along(seed_words)) {
    vec = matrix(pre_trained[seed_words[i],], nrow = 1)
    comb_vec = rbind(comb_vec, vec)
  }
  
  #take column average
  sw_vec = Matrix::colMeans(comb_vec)
  
  # find nearest neighbours
  nns <- find_nns(
    target_embedding = sw_vec,
    pre_trained = pre_trained,
    N = N,
    candidates = NULL,
    norm = "l2"
  )
  
  # open selector window if graphics TRUE
  if (graphics) {
    nns_selected <- select.list(nns,
                                multiple = TRUE,
                                title = 'select your words',
                                graphics = TRUE)
  } else{
    nns_selected <- select.list(nns,
                                multiple = TRUE,
                                title =
                                  'select your words',
                                graphics = FALSE)
  }
  
  return(nns_selected)
  
}
